library(ggplot2)
library(stringr)
library (caTools)
library(ISLR)
library(caret)
Loading required package: lattice
IMDB = read.csv("F:/IIT-C/Sem-3/DPA/Project/Movie_dataset/movie_metadata.csv")
IMDB
dim(IMDB)
[1] 5043 28
summary(IMDB)
color director_name num_critic_for_reviews duration
: 19 : 104 Min. : 1.0 Min. : 7.0
Black and White: 209 Steven Spielberg: 26 1st Qu.: 50.0 1st Qu.: 93.0
Color :4815 Woody Allen : 22 Median :110.0 Median :103.0
Clint Eastwood : 20 Mean :140.2 Mean :107.2
Martin Scorsese : 20 3rd Qu.:195.0 3rd Qu.:118.0
Ridley Scott : 17 Max. :813.0 Max. :511.0
(Other) :4834 NA's :50 NA's :15
director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes
Min. : 0.0 Min. : 0.0 Morgan Freeman : 20 Min. : 0
1st Qu.: 7.0 1st Qu.: 133.0 Charlize Theron: 15 1st Qu.: 614
Median : 49.0 Median : 371.5 Brad Pitt : 14 Median : 988
Mean : 686.5 Mean : 645.0 : 13 Mean : 6560
3rd Qu.: 194.5 3rd Qu.: 636.0 James Franco : 11 3rd Qu.: 11000
Max. :23000.0 Max. :23000.0 Meryl Streep : 11 Max. :640000
NA's :104 NA's :23 (Other) :4959 NA's :7
gross genres actor_1_name
Min. : 162 Drama : 236 Robert De Niro : 49
1st Qu.: 5340988 Comedy : 209 Johnny Depp : 41
Median : 25517500 Comedy|Drama : 191 Nicolas Cage : 33
Mean : 48468408 Comedy|Drama|Romance: 187 J.K. Simmons : 31
3rd Qu.: 62309438 Comedy|Romance : 158 Bruce Willis : 30
Max. :760505847 Drama|Romance : 152 Denzel Washington: 30
NA's :884 (Other) :3910 (Other) :4829
movie_title num_voted_users cast_total_facebook_likes actor_3_name
Ben-Hur : 3 Min. : 5 Min. : 0 : 23
Halloween : 3 1st Qu.: 8594 1st Qu.: 1411 Ben Mendelsohn: 8
Home : 3 Median : 34359 Median : 3090 John Heard : 8
King Kong : 3 Mean : 83668 Mean : 9699 Steve Coogan : 8
Pan : 3 3rd Qu.: 96309 3rd Qu.: 13756 Anne Hathaway : 7
The Fast and the Furious : 3 Max. :1689764 Max. :656730 Jon Gries : 7
(Other) :5025 (Other) :4982
facenumber_in_poster
Min. : 0.000
1st Qu.: 0.000
Median : 1.000
Mean : 1.371
3rd Qu.: 2.000
Max. :43.000
NA's :13
plot_keywords
: 153
based on novel : 4
1940s|child hero|fantasy world|orphan|reference to peter pan : 3
alien friendship|alien invasion|australia|flying car|mother daughter relationship: 3
animal name in title|ape abducts a woman|gorilla|island|king kong : 3
assistant|experiment|frankenstein|medical student|scientist : 3
(Other) :4874
movie_imdb_link num_user_for_reviews language
http://www.imdb.com/title/tt0077651/?ref_=fn_tt_tt_1: 3 Min. : 1.0 English :4704
http://www.imdb.com/title/tt0232500/?ref_=fn_tt_tt_1: 3 1st Qu.: 65.0 French : 73
http://www.imdb.com/title/tt0360717/?ref_=fn_tt_tt_1: 3 Median : 156.0 Spanish : 40
http://www.imdb.com/title/tt1976009/?ref_=fn_tt_tt_1: 3 Mean : 272.8 Hindi : 28
http://www.imdb.com/title/tt2224026/?ref_=fn_tt_tt_1: 3 3rd Qu.: 326.0 Mandarin: 26
http://www.imdb.com/title/tt2638144/?ref_=fn_tt_tt_1: 3 Max. :5060.0 German : 19
(Other) :5025 NA's :21 (Other) : 153
country content_rating budget title_year actor_2_facebook_likes
USA :3807 R :2118 Min. :2.180e+02 Min. :1916 Min. : 0
UK : 448 PG-13 :1461 1st Qu.:6.000e+06 1st Qu.:1999 1st Qu.: 281
France : 154 PG : 701 Median :2.000e+07 Median :2005 Median : 595
Canada : 126 : 303 Mean :3.975e+07 Mean :2002 Mean : 1652
Germany : 97 Not Rated: 116 3rd Qu.:4.500e+07 3rd Qu.:2011 3rd Qu.: 918
Australia: 55 G : 112 Max. :1.222e+10 Max. :2016 Max. :137000
(Other) : 356 (Other) : 232 NA's :492 NA's :108 NA's :13
imdb_score aspect_ratio movie_facebook_likes
Min. :1.600 Min. : 1.18 Min. : 0
1st Qu.:5.800 1st Qu.: 1.85 1st Qu.: 0
Median :6.600 Median : 2.35 Median : 166
Mean :6.442 Mean : 2.22 Mean : 7526
3rd Qu.:7.200 3rd Qu.: 2.35 3rd Qu.: 3000
Max. :9.500 Max. :16.00 Max. :349000
NA's :329
sum(duplicated(IMDB))
[1] 45
IMDB = IMDB[!duplicated(IMDB), ]
dim(IMDB)
[1] 4998 28
IMDB$movie_title <- gsub("Â", "", as.character(factor(IMDB$movie_title)))
str_trim(IMDB$movie_title, side = "right")
[1] "Avatar"
[2] "Pirates of the Caribbean: At World's End"
[3] "Spectre"
[4] "The Dark Knight Rises"
[5] "Star Wars: Episode VII - The Force Awakens"
[6] "John Carter"
[7] "Spider-Man 3"
[8] "Tangled"
[9] "Avengers: Age of Ultron"
[10] "Harry Potter and the Half-Blood Prince"
[11] "Batman v Superman: Dawn of Justice"
[12] "Superman Returns"
[13] "Quantum of Solace"
[14] "Pirates of the Caribbean: Dead Man's Chest"
[15] "The Lone Ranger"
[16] "Man of Steel"
[17] "The Chronicles of Narnia: Prince Caspian"
[18] "The Avengers"
[19] "Pirates of the Caribbean: On Stranger Tides"
[20] "Men in Black 3"
[21] "The Hobbit: The Battle of the Five Armies"
[22] "The Amazing Spider-Man"
[23] "Robin Hood"
[24] "The Hobbit: The Desolation of Smaug"
[25] "The Golden Compass"
[26] "King Kong"
[27] "Titanic"
[28] "Captain America: Civil War"
[29] "Battleship"
[30] "Jurassic World"
[31] "Skyfall"
[32] "Spider-Man 2"
[33] "Iron Man 3"
[34] "Alice in Wonderland"
[35] "X-Men: The Last Stand"
[36] "Monsters University"
[37] "Transformers: Revenge of the Fallen"
[38] "Transformers: Age of Extinction"
[39] "Oz the Great and Powerful"
[40] "The Amazing Spider-Man 2"
[41] "TRON: Legacy"
[42] "Cars 2"
[43] "Green Lantern"
[44] "Toy Story 3"
[45] "Terminator Salvation"
[46] "Furious 7"
[47] "World War Z"
[48] "X-Men: Days of Future Past"
[49] "Star Trek Into Darkness"
[50] "Jack the Giant Slayer"
[51] "The Great Gatsby"
[52] "Prince of Persia: The Sands of Time"
[53] "Pacific Rim"
[54] "Transformers: Dark of the Moon"
[55] "Indiana Jones and the Kingdom of the Crystal Skull"
[56] "The Good Dinosaur"
[57] "Brave"
[58] "Star Trek Beyond"
[59] "WALL·E"
[60] "Rush Hour 3"
[61] "2012"
[62] "A Christmas Carol"
[63] "Jupiter Ascending"
[64] "The Legend of Tarzan"
[65] "The Chronicles of Narnia: The Lion, the Witch and the Wardrobe"
[66] "X-Men: Apocalypse"
[67] "The Dark Knight"
[68] "Up"
[69] "Monsters vs. Aliens"
[70] "Iron Man"
[71] "Hugo"
[72] "Wild Wild West"
[73] "The Mummy: Tomb of the Dragon Emperor"
[74] "Suicide Squad"
[75] "Evan Almighty"
[76] "Edge of Tomorrow"
[77] "Waterworld"
[78] "G.I. Joe: The Rise of Cobra"
[79] "Inside Out"
[80] "The Jungle Book"
[81] "Iron Man 2"
[82] "Snow White and the Huntsman"
[83] "Maleficent"
[84] "Dawn of the Planet of the Apes"
[85] "The Lovers"
[86] "47 Ronin"
[87] "Captain America: The Winter Soldier"
[88] "Shrek Forever After"
[89] "Tomorrowland"
[90] "Big Hero 6"
[91] "Wreck-It Ralph"
[92] "The Polar Express"
[93] "Independence Day: Resurgence"
[94] "How to Train Your Dragon"
[95] "Terminator 3: Rise of the Machines"
[96] "Guardians of the Galaxy"
[97] "Interstellar"
[98] "Inception"
[99] "Godzilla Resurgence"
[100] "The Hobbit: An Unexpected Journey"
[101] "The Fast and the Furious"
[102] "The Curious Case of Benjamin Button"
[103] "X-Men: First Class"
[104] "The Hunger Games: Mockingjay - Part 2"
[105] "The Sorcerer's Apprentice"
[106] "Poseidon"
[107] "Alice Through the Looking Glass"
[108] "Shrek the Third"
[109] "Warcraft"
[110] "Terminator Genisys"
[111] "The Chronicles of Narnia: The Voyage of the Dawn Treader"
[112] "Pearl Harbor"
[113] "Transformers"
[114] "Alexander"
[115] "Harry Potter and the Order of the Phoenix"
[116] "Harry Potter and the Goblet of Fire"
[117] "Hancock"
[118] "I Am Legend"
[119] "Charlie and the Chocolate Factory"
[120] "Ratatouille"
[121] "Batman Begins"
[122] "Madagascar: Escape 2 Africa"
[123] "Night at the Museum: Battle of the Smithsonian"
[124] "X-Men Origins: Wolverine"
[125] "The Matrix Revolutions"
[126] "Frozen"
[127] "The Matrix Reloaded"
[128] "Thor: The Dark World"
[129] "Mad Max: Fury Road"
[130] "Angels & Demons"
[131] "Thor"
[132] "Bolt"
[133] "G-Force"
[134] "Wrath of the Titans"
[135] "Dark Shadows"
[136] "Mission: Impossible - Rogue Nation"
[137] "The Wolfman"
[138] "Bee Movie"
[139] "Kung Fu Panda 2"
[140] "The Last Airbender"
[141] "Mission: Impossible III"
[142] "White House Down"
[143] "Mars Needs Moms"
[144] "Flushed Away"
[145] "Pan"
[146] "Mr. Peabody & Sherman"
[147] "Troy"
[148] "Madagascar 3: Europe's Most Wanted"
[149] "Die Another Day"
[150] "Ghostbusters"
[151] "Armageddon"
[152] "Men in Black II"
[153] "Beowulf"
[154] "Kung Fu Panda 3"
[155] "Mission: Impossible - Ghost Protocol"
[156] "Rise of the Guardians"
[157] "Fun with Dick and Jane"
[158] "The Last Samurai"
[159] "Exodus: Gods and Kings"
[160] "Star Trek"
[161] "Spider-Man"
[162] "How to Train Your Dragon 2"
[163] "Gods of Egypt"
[164] "Stealth"
[165] "Watchmen"
[166] "Lethal Weapon 4"
[167] "Hulk"
[168] "G.I. Joe: Retaliation"
[169] "Sahara"
[170] "Final Fantasy: The Spirits Within"
[171] "Captain America: The First Avenger"
[172] "The World Is Not Enough"
[173] "Master and Commander: The Far Side of the World"
[174] "The Twilight Saga: Breaking Dawn - Part 2"
[175] "Happy Feet 2"
[176] "The Incredible Hulk"
[177] "Miami Vice"
[178] "The BFG"
[179] "The Revenant"
[180] "Turbo"
[181] "Rango"
[182] "Penguins of Madagascar"
[183] "The Bourne Ultimatum"
[184] "Kung Fu Panda"
[185] "Ant-Man"
[186] "The Hunger Games: Catching Fire"
[187] "Home"
[188] "War of the Worlds"
[189] "Bad Boys II"
[190] "Puss in Boots"
[191] "Salt"
[192] "Noah"
[193] "The Adventures of Tintin"
[194] "Harry Potter and the Prisoner of Azkaban"
[195] "Australia"
[196] "After Earth"
[197] "Dinosaur"
[198] "Harry Potter and the Deathly Hallows: Part II"
[199] "Night at the Museum: Secret of the Tomb"
[200] "Megamind"
[201] "Harry Potter and the Sorcerer's Stone"
[202] "R.I.P.D."
[203] "Pirates of the Caribbean: The Curse of the Black Pearl"
[204] "Harry Potter and the Deathly Hallows: Part I"
[205] "The Hunger Games: Mockingjay - Part 1"
[206] "The Da Vinci Code"
[207] "Rio 2"
[208] "X-Men 2"
[209] "Fast Five"
[210] "Sherlock Holmes: A Game of Shadows"
[211] "Clash of the Titans"
[212] "Total Recall"
[213] "The 13th Warrior"
[214] "The Bourne Legacy"
[215] "Batman & Robin"
[216] "How the Grinch Stole Christmas"
[217] "The Day After Tomorrow"
[218] "Mission: Impossible II"
[219] "The Perfect Storm"
[220] "Fantastic 4: Rise of the Silver Surfer"
[221] "Life of Pi"
[222] "Ghost Rider"
[223] "Jason Bourne"
[224] "Charlie's Angels: Full Throttle"
[225] "Prometheus"
[226] "Stuart Little 2"
[227] "Elysium"
[228] "The Chronicles of Riddick"
[229] "RoboCop"
[230] "Speed Racer"
[231] "How Do You Know"
[232] "Knight and Day"
[233] "Oblivion"
[234] "Star Wars: Episode III - Revenge of the Sith"
[235] "Star Wars: Episode II - Attack of the Clones"
[236] "Monsters, Inc."
[237] "The Wolverine"
[238] "Star Wars: Episode I - The Phantom Menace"
[239] "The Croods"
[240] "Asterix at the Olympic Games"
[241] "Windtalkers"
[242] "The Huntsman: Winter's War"
[243] "Teenage Mutant Ninja Turtles"
[244] "Gravity"
[245] "Dante's Peak"
[246] "Teenage Mutant Ninja Turtles: Out of the Shadows"
[247] "Fantastic Four"
[248] "Night at the Museum"
[249] "San Andreas"
[250] "Tomorrow Never Dies"
[251] "The Patriot"
[252] "Ocean's Twelve"
[253] "Mr. & Mrs. Smith"
[254] "Insurgent"
[255] "The Aviator"
[256] "Gulliver's Travels"
[257] "The Green Hornet"
[258] "The A-Team"
[259] "300: Rise of an Empire"
[260] "The Smurfs"
[261] "Home on the Range"
[262] "Allegiant"
[263] "Real Steel"
[264] "The Smurfs 2"
[265] "Speed 2: Cruise Control"
[266] "Ender's Game"
[267] "Live Free or Die Hard"
[268] "The Lord of the Rings: The Fellowship of the Ring"
[269] "Around the World in 80 Days"
[270] "Ali"
[271] "The Cat in the Hat"
[272] "I, Robot"
[273] "Kingdom of Heaven"
[274] "Stuart Little"
[275] "The Princess and the Frog"
[276] "The Martian"
[277] "10,000 B.C."
[278] "The Island"
[279] "Town & Country"
[280] "Gone in Sixty Seconds"
[281] "Gladiator"
[282] "Minority Report"
[283] "Harry Potter and the Chamber of Secrets"
[284] "Casino Royale"
[285] "Planet of the Apes"
[286] "Terminator 2: Judgment Day"
[287] "Public Enemies"
[288] "American Gangster"
[289] "True Lies"
[290] "The Taking of Pelham 1 2 3"
[291] "Little Fockers"
[292] "The Other Guys"
[293] "Eraser"
[294] "Django Unchained"
[295] "The Hunchback of Notre Dame"
[296] "The Emperor's New Groove"
[297] "The Expendables 2"
[298] "National Treasure"
[299] "Eragon"
[300] "Where the Wild Things Are"
[301] "Epic"
[302] "The Tourist"
[303] "End of Days"
[304] "Blood Diamond"
[305] "The Wolf of Wall Street"
[306] "Batman Forever"
[307] "Starship Troopers"
[308] "Cloud Atlas"
[309] "Legend of the Guardians: The Owls of Ga'Hoole"
[310] "Catwoman"
[311] "Hercules"
[312] "Treasure Planet"
[313] "Land of the Lost"
[314] "The Expendables 3"
[315] "Point Break"
[316] "Son of the Mask"
[317] "In the Heart of the Sea"
[318] "The Adventures of Pluto Nash"
[319] "Green Zone"
[320] "The Peanuts Movie"
[321] "The Spanish Prisoner"
[322] "The Mummy Returns"
[323] "Gangs of New York"
[324] "The Flowers of War"
[325] "Surf's Up"
[326] "The Stepford Wives"
[327] "Black Hawk Down"
[328] "The Campaign"
[329] "The Fifth Element"
[330] "Sex and the City 2"
[331] "The Road to El Dorado"
[332] "Ice Age: Continental Drift"
[333] "Cinderella"
[334] "The Lovely Bones"
[335] "Finding Nemo"
[336] "The Lord of the Rings: The Return of the King"
[337] "The Lord of the Rings: The Two Towers"
[338] "Seventh Son"
[339] "Lara Croft: Tomb Raider"
[340] "Transcendence"
[341] "Jurassic Park III"
[342] "Rise of the Planet of the Apes"
[343] "The Spiderwick Chronicles"
[344] "A Good Day to Die Hard"
[345] "The Alamo"
[346] "The Incredibles"
[347] "Cutthroat Island"
[348] "Percy Jackson & the Olympians: The Lightning Thief"
[349] "Men in Black"
[350] "Toy Story 2"
[351] "Unstoppable"
[352] "Rush Hour 2"
[353] "What Lies Beneath"
[354] "Cloudy with a Chance of Meatballs"
[355] "Ice Age: Dawn of the Dinosaurs"
[356] "The Secret Life of Walter Mitty"
[357] "Charlie's Angels"
[358] "The Departed"
[359] "Mulan"
[360] "Tropic Thunder"
[361] "The Girl with the Dragon Tattoo"
[362] "Die Hard with a Vengeance"
[363] "Sherlock Holmes"
[364] "Ben-Hur"
[365] "Atlantis: The Lost Empire"
[366] "Alvin and the Chipmunks: The Road Chip"
[367] "Valkyrie"
[368] "You Don't Mess with the Zohan"
[369] "Pixels"
[370] "A.I. Artificial Intelligence"
[371] "The Haunted Mansion"
[372] "Contact"
[373] "Hollow Man"
[374] "The Interpreter"
[375] "Percy Jackson: Sea of Monsters"
[376] "Lara Croft Tomb Raider: The Cradle of Life"
[377] "Now You See Me 2"
[378] "The Saint"
[379] "Spy Game"
[380] "Mission to Mars"
[381] "Rio"
[382] "Bicentennial Man"
[383] "Volcano"
[384] "The Devil's Own"
[385] "K-19: The Widowmaker"
[386] "Conan the Barbarian"
[387] "Cinderella Man"
[388] "The Nutcracker in 3D"
[389] "Seabiscuit"
[390] "Twister"
[391] "Cast Away"
[392] "Happy Feet"
[393] "The Bourne Supremacy"
[394] "Air Force One"
[395] "Ocean's Eleven"
[396] "The Three Musketeers"
[397] "Hotel Transylvania"
[398] "Enchanted"
[399] "Hannibal"
[400] "Safe House"
[401] "102 Dalmatians"
[402] "Tower Heist"
[403] "The Holiday"
[404] "Enemy of the State"
[405] "It's Complicated"
[406] "Ocean's Thirteen"
[407] "Open Season"
[408] "Divergent"
[409] "Enemy at the Gates"
[410] "The Rundown"
[411] "Last Action Hero"
[412] "Memoirs of a Geisha"
[413] "The Fast and the Furious: Tokyo Drift"
[414] "Arthur Christmas"
[415] "Meet Joe Black"
[416] "Collateral Damage"
[417] "All That Jazz"
[418] "Mirror Mirror"
[419] "Scott Pilgrim vs. the World"
[420] "The Core"
[421] "Nutty Professor II: The Klumps"
[422] "Scooby-Doo"
[423] "Dredd"
[424] "Click"
[425] "Creepshow"
[426] "Cats & Dogs: The Revenge of Kitty Galore"
[427] "Jumper"
[428] "Hellboy II: The Golden Army"
[429] "Zodiac"
[430] "The 6th Day"
[431] "Bruce Almighty"
[432] "The Expendables"
[433] "Mission: Impossible"
[434] "The Hunger Games"
[435] "The Hangover Part II"
[436] "Batman Returns"
[437] "Over the Hedge"
[438] "Lilo & Stitch"
[439] "Charlotte's Web"
[440] "Deep Impact"
[441] "RED 2"
[442] "The Longest Yard"
[443] "Alvin and the Chipmunks: Chipwrecked"
[444] "Grown Ups 2"
[445] "Get Smart"
[446] "Something's Gotta Give"
[447] "Shutter Island"
[448] "Four Christmases"
[449] "Robots"
[450] "Face/Off"
[451] "Bedtime Stories"
[452] "Road to Perdition"
[453] "Just Go with It"
[454] "Daredevil"
[455] "Con Air"
[456] "Eagle Eye"
[457] "Cold Mountain"
[458] "The Book of Eli"
[459] "Flubber"
[460] "The Haunting"
[461] "Space Jam"
[462] "The Pink Panther"
[463] "The Day the Earth Stood Still"
[464] "Conspiracy Theory"
[465] "Fury"
[466] "Six Days Seven Nights"
[467] "Yogi Bear"
[468] "Spirit: Stallion of the Cimarron"
[469] "Zookeeper"
[470] "Lost in Space"
[471] "The Manchurian Candidate"
[472] "Déjà Vu"
[473] "Hotel Transylvania 2"
[474] "Bewitched"
[475] "Fantasia 2000"
[476] "The Time Machine"
[477] "Mighty Joe Young"
[478] "Swordfish"
[479] "The Legend of Zorro"
[480] "What Dreams May Come"
[481] "Little Nicky"
[482] "The Brothers Grimm"
[483] "Mars Attacks!"
[484] "Evolution"
[485] "The Edge"
[486] "Surrogates"
[487] "Thirteen Days"
[488] "Daylight"
[489] "Walking with Dinosaurs 3D"
[490] "Battlefield Earth"
[491] "Looney Tunes: Back in Action"
[492] "Nine"
[493] "Timeline"
[494] "The Postman"
[495] "Babe: Pig in the City"
[496] "The Last Witch Hunter"
[497] "Red Planet"
[498] "Arthur and the Invisibles"
[499] "Oceans"
[500] "A Sound of Thunder"
[501] "Pompeii"
[502] "Top Cat Begins"
[503] "A Beautiful Mind"
[504] "The Lion King"
[505] "Journey 2: The Mysterious Island"
[506] "Cloudy with a Chance of Meatballs 2"
[507] "Red Dragon"
[508] "Hidalgo"
[509] "Jack and Jill"
[510] "2 Fast 2 Furious"
[511] "The Little Prince"
[512] "The Invasion"
[513] "The Adventures of Rocky & Bullwinkle"
[514] "The Secret Life of Pets"
[515] "The League of Extraordinary Gentlemen"
[516] "Despicable Me 2"
[517] "Independence Day"
[518] "The Lost World: Jurassic Park"
[519] "Madagascar"
[520] "Children of Men"
[521] "X-Men"
[522] "Wanted"
[523] "The Rock"
[524] "Ice Age: The Meltdown"
[525] "50 First Dates"
[526] "Hairspray"
[527] "Exorcist: The Beginning"
[528] "Inspector Gadget"
[529] "Now You See Me"
[530] "Grown Ups"
[531] "The Terminal"
[532] "Constantine"
[533] "Hotel for Dogs"
[534] "Vertical Limit"
[535] "Charlie Wilson's War"
[536] "Shark Tale"
[537] "Dreamgirls"
[538] "Life"
[539] "Be Cool"
[540] "Munich"
[541] "Tears of the Sun"
[542] "Killers"
[543] "The Man from U.N.C.L.E."
[544] "Spanglish"
[545] "Monster House"
[546] "Bandits"
[547] "First Knight"
[548] "Anna and the King"
[549] "Immortals"
[550] "Hostage"
[551] "Titan A.E."
[552] "Hollywood Homicide"
[553] "Soldier"
[554] "Carriers"
[555] "Monkeybone"
[556] "Flight of the Phoenix"
[557] "Unbreakable"
[558] "Minions"
[559] "Sucker Punch"
[560] "Snake Eyes"
[561] "Sphere"
[562] "The Angry Birds Movie"
[563] "Fool's Gold"
[564] "Funny People"
[565] "The Kingdom"
[566] "Talladega Nights: The Ballad of Ricky Bobby"
[567] "Dr. Dolittle 2"
[568] "Braveheart"
[569] "Jarhead"
[570] "The Simpsons Movie"
[571] "The Majestic"
[572] "Driven"
[573] "Two Brothers"
[574] "The Village"
[575] "Doctor Dolittle"
[576] "Signs"
[577] "Shrek 2"
[578] "Cars"
[579] "Runaway Bride"
[580] "xXx"
[581] "The SpongeBob Movie: Sponge Out of Water"
[582] "Ransom"
[583] "Inglourious Basterds"
[584] "Hook"
[585] "Die Hard 2"
[586] "S.W.A.T."
[587] "Sleepy Hollow"
[588] "Vanilla Sky"
[589] "Lady in the Water"
[590] "AVP: Alien vs. Predator"
[591] "Alvin and the Chipmunks: The Squeakquel"
[592] "We Were Soldiers"
[593] "Olympus Has Fallen"
[594] "Star Trek: Insurrection"
[595] "Battle Los Angeles"
[596] "Big Fish"
[597] "Wolf"
[598] "War Horse"
[599] "The Monuments Men"
[600] "The Abyss"
[601] "Wall Street: Money Never Sleeps"
[602] "Dracula Untold"
[603] "The Siege"
[604] "Stardust"
[605] "Seven Years in Tibet"
[606] "The Dilemma"
[607] "Bad Company"
[608] "Doom"
[609] "I Spy"
[610] "Underworld: Awakening"
[611] "Rock of Ages"
[612] "Hart's War"
[613] "Killer Elite"
[614] "Rollerball"
[615] "Ballistic: Ecks vs. Sever"
[616] "Hard Rain"
[617] "Osmosis Jones"
[618] "Legends of Oz: Dorothy's Return"
[619] "Blackhat"
[620] "Sky Captain and the World of Tomorrow"
[621] "Basic Instinct 2"
[622] "Escape Plan"
[623] "The Legend of Hercules"
[624] "The Sum of All Fears"
[625] "The Twilight Saga: Eclipse"
[626] "The Score"
[627] "Despicable Me"
[628] "Money Train"
[629] "Ted 2"
[630] "Agora"
[631] "Mystery Men"
[632] "Hall Pass"
[633] "The Insider"
[634] "The Finest Hours"
[635] "Body of Lies"
[636] "Dinner for Schmucks"
[637] "Abraham Lincoln: Vampire Hunter"
[638] "Entrapment"
[639] "Last Man Standing"
[640] "The X Files"
[641] "The Last Legion"
[642] "Saving Private Ryan"
[643] "Need for Speed"
[644] "What Women Want"
[645] "Ice Age"
[646] "Dreamcatcher"
[647] "Lincoln"
[648] "The Matrix"
[649] "Apollo 13"
[650] "The Santa Clause 2"
[651] "Les Misérables"
[652] "You've Got Mail"
[653] "Step Brothers"
[654] "The Mask of Zorro"
[655] "Due Date"
[656] "Unbroken"
[657] "Space Cowboys"
[658] "Cliffhanger"
[659] "Broken Arrow"
[660] "The Kid"
[661] "World Trade Center"
[662] "Mona Lisa Smile"
[663] "The Dictator"
[664] "Eyes Wide Shut"
[665] "Annie"
[666] "Focus"
[667] "This Means War"
[668] "Blade: Trinity"
[669] "Red Dawn"
[670] "Primary Colors"
[671] "Resident Evil: Retribution"
[672] "Death Race"
[673] "The Long Kiss Goodnight"
[674] "Proof of Life"
[675] "Zathura: A Space Adventure"
[676] "Fight Club"
[677] "We Are Marshall"
[678] "The Missing"
[679] "Hudson Hawk"
[680] "Lucky Numbers"
[681] "I, Frankenstein"
[682] "Oliver Twist"
[683] "Elektra"
[684] "Sin City: A Dame to Kill For"
[685] "Random Hearts"
[686] "Everest"
[687] "Perfume: The Story of a Murderer"
[688] "Austin Powers in Goldmember"
[689] "Astro Boy"
[690] "Jurassic Park"
[691] "Wyatt Earp"
[692] "Clear and Present Danger"
[693] "Dragon Blade"
[694] "Littleman"
[695] "U-571"
[696] "The American President"
[697] "The Love Guru"
[698] "3000 Miles to Graceland"
[699] "The Hateful Eight"
[700] "Blades of Glory"
[701] "Hop"
[702] "300"
[703] "Meet the Fockers"
[704] "Marley & Me"
[705] "The Green Mile"
[706] "Wild Hogs"
[707] "Chicken Little"
[708] "Gone Girl"
[709] "The Bourne Identity"
[710] "GoldenEye"
[711] "The General's Daughter"
[712] "The Truman Show"
[713] "The Prince of Egypt"
[714] "Daddy Day Care"
[715] "2 Guns"
[716] "Cats & Dogs"
[717] "The Italian Job"
[718] "Two Weeks Notice"
[719] "Antz"
[720] "Couples Retreat"
[721] "Days of Thunder"
[722] "Cheaper by the Dozen 2"
[723] "The Scorch Trials"
[724] "Eat Pray Love"
[725] "The Family Man"
[726] "RED"
[727] "Any Given Sunday"
[728] "The Horse Whisperer"
[729] "Collateral"
[730] "The Scorpion King"
[731] "Ladder 49"
[732] "Jack Reacher"
[733] "Deep Blue Sea"
[734] "This Is It"
[735] "Contagion"
[736] "Kangaroo Jack"
[737] "Coraline"
[738] "The Happening"
[739] "Man on Fire"
[740] "The Shaggy Dog"
[741] "Starsky & Hutch"
[742] "Jingle All the Way"
[743] "Hellboy"
[744] "A Civil Action"
[745] "ParaNorman"
[746] "The Jackal"
[747] "Paycheck"
[748] "Up Close & Personal"
[749] "The Tale of Despereaux"
[750] "Rules of Engagement"
[751] "The Tuxedo"
[752] "Under Siege 2: Dark Territory"
[753] "Jack Ryan: Shadow Recruit"
[754] "Joy"
[755] "London Has Fallen"
[756] "Alien: Resurrection"
[757] "Shooter"
[758] "The Boxtrolls"
[759] "Practical Magic"
[760] "The Lego Movie"
[761] "Miss Congeniality 2: Armed and Fabulous"
[762] "Reign of Fire"
[763] "Gangster Squad"
[764] "Year One"
[765] "Invictus"
[766] "State of Play"
[767] "Duplicity"
[768] "My Favorite Martian"
[769] "The Sentinel"
[770] "Planet 51"
[771] "Star Trek: Nemesis"
[772] "Intolerable Cruelty"
[773] "Trouble with the Curve"
[774] "Edge of Darkness"
[775] "The Relic"
[776] "Analyze That"
[777] "Righteous Kill"
[778] "Mercury Rising"
[779] "The Soloist"
[780] "The Legend of Bagger Vance"
[781] "Almost Famous"
[782] "Garfield 2"
[783] "xXx: State of the Union"
[784] "Priest"
[785] "Sinbad: Legend of the Seven Seas"
[786] "Event Horizon"
[787] "Dragonfly"
[788] "The Black Dahlia"
[789] "Flyboys"
[790] "The Last Castle"
[791] "Supernova"
[792] "Winter's Tale"
[793] "The Mortal Instruments: City of Bones"
[794] "Meet Dave"
[795] "Dark Water"
[796] "Edtv"
[797] "Inkheart"
[798] "The Spirit"
[799] "Mortdecai"
[800] "In the Name of the King: A Dungeon Siege Tale"
[801] "Beyond Borders"
[802] "Xi you ji zhi: Sun Wukong san da Baigu Jing"
[803] "The Great Raid"
[804] "Deadpool"
[805] "Holy Man"
[806] "American Sniper"
[807] "Goosebumps"
[808] "Sabrina, the Teenage Witch"
[809] "Just Like Heaven"
[810] "The Flintstones in Viva Rock Vegas"
[811] "Rambo III"
[812] "Leatherheads"
[813] "The Ridiculous 6"
[814] "Did You Hear About the Morgans?"
[815] "The Internship"
[816] "Resident Evil: Afterlife"
[817] "Red Tails"
[818] "Sex and the City"
[819] "The Devil's Advocate"
[820] "That's My Boy"
[821] "DragonHeart"
[822] "After the Sunset"
[823] "Ghost Rider: Spirit of Vengeance"
[824] "Captain Corelli's Mandolin"
[825] "Anger Management"
[826] "The Pacifier"
[827] "Walking Tall"
[828] "Forrest Gump"
[829] "Alvin and the Chipmunks"
[830] "Meet the Parents"
[831] "Pocahontas"
[832] "Superman"
[833] "The Nutty Professor"
[834] "Hitch"
[835] "George of the Jungle"
[836] "American Wedding"
[837] "Captain Phillips"
[838] "Date Night"
[839] "Casper"
[840] "The Equalizer"
[841] "Maid in Manhattan"
[842] "Crimson Tide"
[843] "The Pursuit of Happyness"
[844] "Flightplan"
[845] "Disclosure"
[846] "City of Angels"
[847] "Kill Bill: Vol. 1"
[848] "Bowfinger"
[849] "Stargate SG-1"
[850] "Kill Bill: Vol. 2"
[851] "Tango & Cash"
[852] "Death Becomes Her"
[853] "Shanghai Noon"
[854] "Executive Decision"
[855] "Mr. Popper's Penguins"
[856] "The Forbidden Kingdom"
[857] "Free Birds"
[858] "Alien 3"
[859] "Evita"
[860] "Ronin"
[861] "The Ghost and the Darkness"
[862] "Paddington"
[863] "The Watch"
[864] "The Hunted"
[865] "Instinct"
[866] "Stuck on You"
[867] "Semi-Pro"
[868] "The Pirates! Band of Misfits"
[869] "Changeling"
[870] "Chain Reaction"
[871] "The Fan"
[872] "The Phantom of the Opera"
[873] "Elizabeth: The Golden Age"
[874] "Ãon Flux"
[875] "Gods and Generals"
[876] "Turbulence"
[877] "Imagine That"
[878] "Muppets Most Wanted"
[879] "Thunderbirds"
[880] "Burlesque"
[881] "A Very Long Engagement"
[882] "Lolita"
[883] "Eye See You"
[884] "Blade II"
[885] "Seven Pounds"
[886] "Bullet to the Head"
[887] "The Godfather: Part III"
[888] "Elizabethtown"
[889] "You, Me and Dupree"
[890] "Superman II"
[891] "Gigli"
[892] "All the King's Men"
[893] "Shaft"
[894] "Anastasia"
[895] "Moulin Rouge!"
[896] "Domestic Disturbance"
[897] "Black Mass"
[898] "Flags of Our Fathers"
[899] "Law Abiding Citizen"
[900] "Grindhouse"
[901] "Beloved"
[902] "Lucky You"
[903] "Catch Me If You Can"
[904] "Zero Dark Thirty"
[905] "The Break-Up"
[906] "Mamma Mia!"
[907] "Valentine's Day"
[908] "The Dukes of Hazzard"
[909] "The Thin Red Line"
[910] "The Change-Up"
[911] "Man on the Moon"
[912] "Casino"
[913] "From Paris with Love"
[914] "Bulletproof Monk"
[915] "Me, Myself & Irene"
[916] "Barnyard"
[917] "Deck the Halls"
[918] "The Twilight Saga: New Moon"
[919] "Shrek"
[920] "The Adjustment Bureau"
[921] "Robin Hood: Prince of Thieves"
[922] "Jerry Maguire"
[923] "Ted"
[924] "As Good as It Gets"
[925] "Patch Adams"
[926] "Anchorman 2: The Legend Continues"
[927] "Mr. Deeds"
[928] "Super 8"
[929] "Erin Brockovich"
[930] "How to Lose a Guy in 10 Days"
[931] "22 Jump Street"
[932] "Interview with the Vampire: The Vampire Chronicles"
[933] "Yes Man"
[934] "Central Intelligence"
[935] "Stepmom"
[936] "Daddy's Home"
[937] "Into the Woods"
[938] "Inside Man"
[939] "Payback"
[940] "Congo"
[941] "We Bought a Zoo"
[942] "Knowing"
[943] "Failure to Launch"
[944] "The Ring Two"
[945] "Crazy, Stupid, Love."
[946] "Garfield"
[947] "Christmas with the Kranks"
[948] "Moneyball"
[949] "Outbreak"
[950] "Non-Stop"
[951] "Race to Witch Mountain"
[952] "V for Vendetta"
[953] "Shanghai Knights"
[954] "Unforgotten"
[955] "Curious George"
[956] "Herbie Fully Loaded"
[957] "Don't Say a Word"
[958] "Hansel & Gretel: Witch Hunters"
[959] "Unfaithful"
[960] "I Am Number Four"
[961] "Syriana"
[962] "13 Hours"
[963] "The Book of Life"
[964] "Firewall"
[965] "Absolute Power"
[966] "G.I. Jane"
[967] "The Game"
[968] "Silent Hill"
[969] "The Replacements"
[970] "American Reunion"
[971] "The Negotiator"
[972] "Into the Storm"
[973] "Beverly Hills Cop III"
[974] "Gremlins 2: The New Batch"
[975] "The Judge"
[976] "The Peacemaker"
[977] "Resident Evil: Apocalypse"
[978] "Bridget Jones: The Edge of Reason"
[979] "Out of Time"
[980] "On Deadly Ground"
[981] "The Adventures of Sharkboy and Lavagirl 3-D"
[982] "The Beach"
[983] "Raising Helen"
[984] "Ninja Assassin"
[985] "For Love of the Game"
[986] "A Touch of Frost"
[987] "Striptease"
[988] "Marmaduke"
[989] "Hereafter"
[990] "Murder by Numbers"
[991] "Assassins"
[992] "Hannibal Rising"
[993] "The Story of Us"
[994] "The Host"
[995] "Basic"
[996] "Blood Work"
[997] "The International"
[998] "Escape from L.A."
[999] "Twisted"
[1000] "The Iron Giant"
[ reached getOption("max.print") -- omitted 3998 entries ]
colSums(sapply(IMDB, is.na))
color director_name num_critic_for_reviews
0 0 49
duration director_facebook_likes actor_3_facebook_likes
15 103 23
actor_2_name actor_1_facebook_likes gross
0 7 874
genres actor_1_name movie_title
0 0 0
num_voted_users cast_total_facebook_likes actor_3_name
0 0 0
facenumber_in_poster plot_keywords movie_imdb_link
13 0 0
num_user_for_reviews language country
21 0 0
content_rating budget title_year
0 487 107
actor_2_facebook_likes imdb_score aspect_ratio
13 0 327
movie_facebook_likes
0
IMDB = IMDB[!is.na(IMDB$gross), ]
IMDB = IMDB[!is.na(IMDB$budget), ]
IMDB = IMDB[!is.na(IMDB$aspect_ratio), ]
IMDB = IMDB[!is.na(IMDB$title_year), ]
dim(IMDB)
[1] 3783 28
mean_fnposter=mean(IMDB$facenumber_in_poster, na.rm = TRUE)
IMDB$facenumber_in_poster[is.na(IMDB$facenumber_in_poster)]=round(mean_fnposter)
IMDB[,c(5,6,8,13,24,26)][IMDB[,c(5,6,8,13,24,26)] == 0] <- NA
IMDB$num_critic_for_reviews[is.na(IMDB$num_critic_for_reviews)] <- round(mean(IMDB$num_critic_for_reviews, na.rm = TRUE))
IMDB$duration[is.na(IMDB$duration)] <- round(mean(IMDB$duration, na.rm = TRUE))
IMDB$director_facebook_likes[is.na(IMDB$director_facebook_likes)] <- round(mean(IMDB$director_facebook_likes, na.rm = TRUE))
IMDB$actor_3_facebook_likes[is.na(IMDB$actor_3_facebook_likes)] <- round(mean(IMDB$actor_3_facebook_likes, na.rm = TRUE))
IMDB$actor_1_facebook_likes[is.na(IMDB$actor_1_facebook_likes)] <- round(mean(IMDB$actor_1_facebook_likes, na.rm = TRUE))
IMDB$cast_total_facebook_likes[is.na(IMDB$cast_total_facebook_likes)] <- round(mean(IMDB$cast_total_facebook_likes, na.rm = TRUE))
IMDB$actor_2_facebook_likes[is.na(IMDB$actor_2_facebook_likes)] <- round(mean(IMDB$actor_2_facebook_likes, na.rm = TRUE))
IMDB$movie_facebook_likes[is.na(IMDB$movie_facebook_likes)] <- round(mean(IMDB$movie_facebook_likes, na.rm = TRUE))
dim(IMDB)
[1] 3783 28
colSums(sapply(IMDB, is.na))
color director_name num_critic_for_reviews
0 0 0
duration director_facebook_likes actor_3_facebook_likes
0 0 0
actor_2_name actor_1_facebook_likes gross
0 0 0
genres actor_1_name movie_title
0 0 0
num_voted_users cast_total_facebook_likes actor_3_name
0 0 0
facenumber_in_poster plot_keywords movie_imdb_link
0 0 0
num_user_for_reviews language country
0 0 0
content_rating budget title_year
0 0 0
actor_2_facebook_likes imdb_score aspect_ratio
0 0 0
movie_facebook_likes
0
table(IMDB$color)
Black and White Color
2 128 3653
IMDB <- subset(IMDB, select = -c(color))
table(IMDB$language)
Aboriginal Arabic Aramaic Bosnian Cantonese Chinese Czech Danish
2 2 1 1 1 8 0 1 3
Dari Dutch Dzongkha English Filipino French German Greek Hebrew
2 3 1 3608 1 36 13 0 2
Hindi Hungarian Icelandic Indonesian Italian Japanese Kannada Kazakh Korean
8 1 1 2 7 12 0 1 5
Mandarin Maya Mongolian None Norwegian Panjabi Persian Polish Portuguese
14 1 1 1 4 0 3 0 5
Romanian Russian Slovenian Spanish Swahili Swedish Tamil Telugu Thai
1 1 0 23 0 1 0 1 3
Urdu Vietnamese Zulu
0 1 1
IMDB <- subset(IMDB, select = -c(language))
dim(IMDB)
[1] 3783 26
library(dplyr)
Attaching package: <U+393C><U+3E31>dplyr<U+393C><U+3E32>
The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:
filter, lag
The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:
intersect, setdiff, setequal, union
IMDB <- IMDB %>%
mutate(profit = gross - budget,
return_on_investment_perc = (profit/budget)*100)
table(IMDB$profit)
-12213298588 -4199788333 -2499804112 -2397701809 -2127109510 -1099560838 -989962610
1 1 1 1 1 1 1
-698312689 -696724557 -553005191 -399545745 -375868702 -299897945 -190641321
1 1 1 1 1 1 1
-188094481 -164334574 -149800772 -149237822 -143826840 -139853928 -136702695
1 1 1 1 1 1 1
-129828140 -128624673 -128620685 -125710090 -120706229 -115035182 -113021005
1 1 1 1 1 2 1
-111931604 -109916481 -109244357 -108858926 -104868170 -103295584 -102725701
1 1 1 1 1 1 1
-101879446 -99344497 -96941620 -96582135 -96407585 -96179906 -95588898
1 1 1 1 1 1 1
-94780265 -93990787 -93153376 -91973129 -90200974 -89809438 -89787380
1 1 1 1 2 1 1
-88958772 -88214518 -88062505 -87207693 -8.7e+07 -86856188 -86753780
1 1 1 1 1 1 1
-85995841 -84593638 -84540684 -83832772 -83406809 -83385977 -83287549
2 1 1 1 1 1 1
-82603302 -80448338 -79860848 -77823100 -77810499 -76985496 -76896216
1 1 1 1 1 1 1
-76227778 -76070659 -75014388 -74992955 -74901420 -74710310 -74679997
2 1 1 2 1 1 1
-74088170 -71007672 -70868670 -69833498 -69590483 -69477903 -69382160
1 1 1 1 1 1 1
-69223741 -67838031 -67047980 -66989354 -66359574 -65267965 -64975525
1 1 1 1 1 1 1
-64928486 -64927587 -64831323 -64148812 -63885779 -62902875 -62684455
1 1 1 1 1 1 1
-62643910 -62406609 -62149904 -61682000 -61357548 -60917086 -60519261
1 1 1 1 1 1 1
-60432117 -60326576 -59977549 -59973647 -59801290 -59768583 -59533912
1 1 1 1 1 1 1
-59331829 -59192738 -59049180 -57435963 -56780252 -56425668 -56403089
1 1 1 1 1 1 1
-56254592 -56073614 -55948241 -55705158 -55464883 -55263886 -54985766
1 1 1 1 1 1 1
-54684676 -54531374 -53559509 -53361326 -52781920 -52526755 -52394332
1 1 1 1 1 1 1
-52305212 -52248021 -51975100 -51249444 -51178721 -51009458 -50781132
1 1 1 1 1 1 1
-50707978 -50616376 -50607905 -50299935 -50231945 -50180506 -50108179
1 1 1 1 1 1 1
-50100362 -49999390 -49976894 -49649808 -49300000 -49244714 -48844258
1 1 1 1 1 1 1
-48816034 -48793865 -48651080 -48343151 -48339916 -48197944 -48012713
1 1 1 1 1 1 1
-47934015 -47824688 -47791061 -47778542 -47468302 -47456299 -47362515
1 1 1 1 1 1 1
-47128307 -47114435 -46917712 -46840695 -46644185 -46604061 -45979146
1 1 1 1 1 1 1
-45940982 -45618284 -45516179 -45451936 -45433254 -45393665 -44986654
1 1 1 1 1 1 1
-44951668 -44925825 -44854891 -44203958 -44197339 -43997807 -43926768
1 1 1 1 1 1 1
-43798339 -43708398 -43528606 -43129431 -43122835 -42845460 -42823835
1 1 1 1 1 1 1
-42718168 -42633738 -42525888 -42500000 -41791922 -41779224 -41771652
1 1 1 1 1 1 1
-41599308 -41472875 -41457582 -41424284 -41400000 -41392993 -41132306
1 1 1 1 1 2 1
-40923185 -40832183 -40713572 -40712370 -40581346 -40557129 -40554783
1 1 1 1 1 1 1
-40415114 -40218121 -40204983 -40008098 -39871022 -39862768 -39842984
1 1 1 1 1 1 1
-39600000 -39488080 -39443804 -39383131 -39371991 -39347474 -39335749
1 1 1 1 1 1 1
-39293214 -39034803 -38972251 -38911390 -38735525 -38476729 -38434505
1 1 1 1 1 1 1
-38359790 -38023633 -37955679 -37948000 -37894825 -37673753 -37566085
1 1 1 1 1 1 1
-37407175 -37376626 -37083113 -36954460 -36894400 -36870545 -36709023
1 1 1 1 1 1 1
-36624819 -36490658 -36460586 -36439040 -36426209 -36142013 -35962961
1 1 1 1 1 1 1
-35946421 -35748872 -35646427 -35610546 -35344410 -35222993 -35164032
1 1 1 1 1 1 1
-34950587 -34931608 -34816875 -34636971 -34547688 -34369221 -34314732
1 1 1 1 1 1 1
-34226481 -33995909 -33982216 -33887833 -33885763 -33873489 -33853591
2 1 1 1 1 1 1
-33795862 -33716560 -33711680 -33649447 -33494358 -33383410 -33328495
1 1 1 2 1 1 1
-33118496 -33068911 -33045595 -32973330 -32945076 -32900538 -32778006
1 1 1 1 1 1 1
-32737712 -32713269 -32675768 -32637500 -32401069 -32368843 -32310542
1 1 1 1 1 2 1
-32245792 -32239920 -32156953 -32100000 -32098491 -32005864 -31968750
1 1 1 1 1 1 1
-31951647 -31770880 -31671949 -31654597 -31631573 -31527637 -31483987
1 1 1 1 1 1 1
-31480159 -31471505 -30978265 -30915039 -30863374 -30784021 -30695163
1 1 1 1 1 1 1
-30619558 -30579748 -30573703 -30524377 -30517805 -30503417 -30410125
1 1 1 1 1 1 1
-30406260 -30348023 -30348000 -30340253 -30161611 -29959707 -29936195
1 1 1 1 1 1 1
-29821223 -29671868 -29649103 -29643614 -29610033 -29593736 -29565557
1 1 1 1 1 1 1
-29511421 -29500737 -29476832 -29462420 -29459980 -29449288 -29359355
1 1 1 1 1 1 1
-29346242 -29304773 -29125385 -29103248 -29100030 -29067940 -29067628
1 1 1 1 1 1 1
-2.9e+07 -28979512 -28935714 -28889425 -28888740 -28834579 -28791977
1 1 1 1 1 1 1
-28675334 -28551183 -28505730 -28437251 -28419913 -28407897 -28342760
1 1 1 1 1 1 1
-28329069 -28169943 -28159583 -28134226 -27999515 -27989147 -27968963
1 1 1 1 1 1 1
-27877751 -27818516 -27805940 -27727253 -27700000 -27699111 -27541731
1 1 1 1 1 1 1
-27481675 -27477648 -27456806 -27339971 -27330724 -27251390 -27130631
1 1 1 1 1 1 1
-27059493 -27051841 -27024351 -26800785 -26675252 -26623494 -26573039
2 1 1 1 1 1 1
-26355679 -26261368 -26161870 -26095018 -26033943 -26003245 -25993274
1 1 1 1 1 1 1
-25976605 -25900341 -25872105 -25747235 -25739083 -25643000 -25641967
1 2 1 1 1 1 1
-25622273 -25551411 -25536708 -25458771 -25314641 -25280070 -25057578
1 1 1 1 1 1 1
-25032818 -24995244 -24990090 -24977277 -24969837 -24943308 -24923864
1 1 1 1 1 1 1
-24922769 -24903810 -24898139 -24878709 -24870885 -24865096 -24862498
1 1 1 1 1 1 1
-24860746 -24832370 -24816338 -24785987 -24768635 -24745828 -24731154
1 1 1 1 1 1 1
-24720320 -24700000 -24697039 -24673965 -24655608 -24599087 -24592750
1 1 1 1 1 1 1
-24549473 -24536691 -24500032 -24326667 -24282247 -24224407 -24222577
1 1 2 1 1 1 1
-24128473 -24123329 -24043621 -24037529 -24008619 -23990820 -23979782
1 1 1 1 1 1 1
-23962091 -23957120 -23948874 -23910635 -23839471 -23813043 -23800483
1 1 1 1 1 1 1
-23720770 -23676031 -23660367 -23594175 -23557749 -23487185 -23463880
1 1 1 1 1 1 1
-23450000 -23445431 -23402879 -23399089 -23368755 -23345419 -23315181
1 1 1 1 1 2 1
-23185810 -23117290 -23100203 -23014913 -22947259 -22932840 -22795993
1 1 1 1 1 1 1
-22777816 -22772060 -22698869 -22673938 -22654469 -22620910 -22556993
1 1 1 1 1 1 1
-22543550 -22528315 -22523765 -22520222 -22509198 -22505025 -22496684
1 1 1 1 1 1 1
-22467699 -22431493 -22332053 -22324928 -22320362 -22236956 -21949883
1 1 1 1 1 1 1
-21926608 -21898858 -21878028 -21866841 -21790660 -21775151 -21735014
1 1 1 1 1 1 1
-21621647 -21598317 -21569645 -21561851 -21527150 -21482203 -21467706
1 2 1 1 1 1 1
-21464425 -21441338 -21383311 -21219958 -21171682 -21162885 -21084535
1 1 1 1 1 1 1
-21017158 -21001676 -20984407 -20949555 -20935667 -20924712 -20901437
2 1 1 1 1 1 1
-20881753 -20848136 -20761389 -20694789 -20656327 -20593594 -20540176
1 1 1 1 2 1 1
-20519686 -20479108 -20412268 -20291812 -20288322 -20203976 -20066330
1 1 1 1 1 1 1
-20041681 -20001930 -2e+07 -19983606 -19982851 -19923562 -19886035
1 1 1 1 1 1 1
-19880795 -19871876 -19865246 -19799588 -19736635 -19700000 -19694930
1 1 1 1 1 1 1
-19693732 -19647348 -19624526 -19596068 -19565399 -19523730 -19513094
1 1 1 1 1 1 1
-19492772 -19483292 -19440070 -19346254 -19345223 -19314120 -19292673
1 1 1 1 1 1 1
-19289545 -19145306 -19119074 -19111857 -19083691 -19044575 -19036988
1 1 1 1 1 1 1
-19034791 -19000897 -18987439 -18918217 -18891180 -18891023 -18851301
1 1 1 1 1 1 1
-18823678 -18816646 -18809982 -18774204 -18752184 -18704226 -18696681
1 1 1 1 1 1 1
-18592056 -18586498 -18569728 -18503478 -18442521 -18435269 -18423913
1 1 1 1 1 1 1
-18353336 -18309470 -18302044 -18287436 -18236870 -18218547 -18125131
1 1 1 1 1 1 2
-18100000 -18058441 -18011004 -1.8e+07 -17954750 -17918075 -17872563
1 1 1 1 1 1 1
-17843067 -17835684 -17754000 -17726628 -17714996 -17707473 -17680813
1 1 1 1 1 1 1
-17646272 -17624564 -17525384 -17430732 -17419365 -17403744 -17399565
1 1 1 1 1 1 1
-17390018 -17363557 -17323302 -17287907 -17282242 -17258404 -17231467
1 1 1 1 1 1 1
-17174509 -17168879 -17092709 -17012353 -16969037 -16939434 -16880121
1 1 1 1 1 1 1
-16880006 -16855194 -16726580 -16702103 -16693834 -16682849 -16675758
1 1 1 1 1 1 1
-16662721 -16624154 -16617691 -16603219 -16601608 -16579086 -16577194
1 1 1 1 1 1 1
-16480373 -16446167 -16406844 -16400000 -16356539 -16250939 -16137932
1 1 1 1 1 1 1
-16076109 -16025347 -16008503 -15998879 -15997045 -15997036 -15981636
1 1 2 1 1 1 1
-15791616 -15765960 -15705099 -15682875 -15667676 -15622726 -15614237
1 1 1 1 1 1 1
-15572796 -15565274 -15559345 -15552522 -15481610 -15430082 -15365723
1 1 1 1 1 1 1
-15307392 -15278000 -15229136 -15218475 -15186564 -15182960 -15147856
1 1 1 1 1 1 1
-15116461 -15111972 -1.5e+07 -14994117 -14979620 -14976909 -14971130
1 1 1 1 1 1 1
-14964155 -14930000 -14926452 -14888185 -14885685 -14882502 -14867345
1 1 1 1 1 1 1
-14843800 -14831605 -14821835 -14816912 -14796956 -14794657 -14724554
1 1 1 1 1 1 1
-14684317 -14675559 -14666342 -14664065 -14646257 -14618493 -14600121
1 1 1 1 1 1 1
-14577793 -14549025 -14491311 -14386444 -14360758 -14353446 -14333738
1 1 1 1 1 1 1
-14318803 -14313571 -14306109 -14298357 -14292206 -14290615 -14280633
1 1 1 1 1 1 1
-14250866 -14247275 -14230040 -14202684 -14182006 -14169043 -14168495
1 1 1 1 1 1 1
-14125795 -14088250 -14072893 -14057170 -14051289 -14024817 -14015257
1 1 1 1 1 1 1
-14003282 -13997244 -13991568 -13960131 -13955468 -13953505 -13936740
1 1 1 1 1 1 1
-13924922 -13912817 -13900000 -13884496 -13870652 -13811358 -13751523
1 1 1 1 1 1 1
-13749680 -13723190 -13705360 -13642958 -13595247 -13594629 -13585465
1 1 1 1 1 1 1
-13579681 -13571117 -13568780 -13564917 -13461796 -13436643 -13414611
1 1 1 1 1 1 1
-13414567 -13390407 -13383001 -13363463 -13334037 -13320925 -13313646
1 1 1 1 1 1 1
-13266274 -13236409 -13215568 -13200000 -13150858 -13069116 -13046268
1 1 1 1 1 1 1
-13045798 -1.3e+07 -12976053 -12946185 -12931913 -12904682 -12879981
1 1 1 1 1 1 1
-12863050 -12803790 -12796359 -12785034 -12733495 -12731341 -12698695
1 1 1 1 1 1 1
-12663533 -12662421 -12617007 -12602611 -12600000 -12587955 -12513862
1 1 1 1 1 1 1
-12471908 -12436000 -12390000 -12339853 -12336018 -12305895 -12305599
1 1 1 1 1 1 1
-12249417 -12247786 -12170266 -12144354 -12122192 -12112543 -12086323
1 1 1 1 1 1 1
-12069815 -12069202 -12048569 -12037424 -1.2e+07 -11985384 -11975825
1 1 1 1 2 1 1
-11961340 -11945720 -11934196 -11906682 -11895931 -11880078 -11858147
1 1 1 1 1 1 1
-11851144 -11849597 -11843275 -11834365 -11827231 -11824630 -11792806
1 1 1 1 1 1 1
-11785798 -11752860 -11747316 -11738519 -11701351 -11689814 -11666024
1 1 1 1 1 1 1
-11643257 -11627590 -11537400 -11530572 -11528148 -11526000 -11500000
1 1 1 1 1 1 1
-11499034 -11491577 -11478165 -11464232 -11451066 -11413624 -11410555
1 1 1 1 1 1 1
-11409500 -11386249 -11352310 -11348338 -11272676 -11220364 -11160617
1 1 1 1 1 1 1
-11139998 -11120689 -11111645 -11096291 -11032429 -11026468 -11011124
1 1 1 1 1 1 1
-1.1e+07 -10993094 -10964434 -10938241 -10922453 -10904697 -10899350
1 1 1 1 1 1 1
-10897090 -10896000 -10890678 -10886412 -10825346 -10822459 -10810486
1 1 1 1 1 1 2
-10806975 -10802214 -10764163 -10740241 -10723500 -10703427 -10683354
1 1 1 1 1 1 1
-10675256 -10673164 -10665355 -10660475 -10647214 -10639452 -10639221
1 1 1 1 1 1 1
-10625822 -10603058 -10514515 -10500000 -10493002 -10482822 -10474724
1 1 1 1 1 1 1
-10439741 -10410556 -10362510 -10297723 -10285518 -10265765 -10265197
1 1 1 1 1 1 1
-10265156 -10260951 -10241539 -10227031 -10221401 -10220716 -10217492
1 1 1 1 1 1 1
-10197239 -10162368 -10151678 -10144863 -10139961 -10120004 -10100000
1 1 1 1 1 1 1
-10053771 -10043599 -10016428 -1e+07 -9996393 -9994439 -9994022
1 1 1 1 1 1 1
-9988165 -9984553 -9980747 -9980652 -9976725 -9975917 -9939047
1 1 1 1 1 1 1
-9933445 -9908458 -9906393 -9871514 -9868383 -9830576 -9816803
1 1 1 1 1 1 1
-9810821 -9800895 -9782502 -9777353 -9773208 -9767063 -9733871
1 1 1 1 1 1 1
-9708376 -9708035 -9693553 -9693285 -9673593 -9634266 -9561700
1 1 1 1 1 1 1
-9552250 -9550442 -9530189 -9528558 -9511128 -9491157 -9468991
1 1 1 1 1 1 1
-9455857 -9450893 -9444652 -9441786 -9430929 -9421473 -9401355
1 1 1 1 1 1 1
-9382401 -9362320 -9352616 -9297917 -9284389 -9258013
1 1 1 1 1 1
[ reached getOption("max.print") -- omitted 2705 entries ]
install.packages("pastecs")
Installing package into <U+393C><U+3E31>C:/Users/Mayur Mehta/Documents/R/win-library/3.5<U+393C><U+3E32>
(as <U+393C><U+3E31>lib<U+393C><U+3E32> is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/pastecs_1.3.21.zip'
Content type 'application/zip' length 477487 bytes (466 KB)
downloaded 466 KB
package pastecs successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\Mayur Mehta\AppData\Local\Temp\RtmpyIA7Z9\downloaded_packages
library(pastecs)
Attaching package: <U+393C><U+3E31>pastecs<U+393C><U+3E32>
The following objects are masked from <U+393C><U+3E31>package:dplyr<U+393C><U+3E32>:
first, last
stat.desc(IMDB)
ggplot(IMDB, aes(imdb_score)) +
geom_bar() +
labs(x = "imdb score", y = "Movie Count", title = "Histogram of Movie imdb score") +
theme(plot.title = element_text(hjust = 1.0))
IMDB <- IMDB[IMDB$imdb_score >= 3.5,]
IMDB$imdb_score
[1] 7.9 7.1 6.8 8.5 6.6 6.2 7.8 7.5 7.5 6.9 6.1 6.7 7.3 6.5 7.2 6.6 8.1 6.7 6.8 7.5 7.0 6.7 7.9 6.1
[25] 7.2 7.7 8.2 5.9 7.0 7.8 7.3 7.2 6.5 6.8 7.3 6.0 5.7 6.4 6.7 6.8 6.3 5.6 8.3 6.6 7.2 7.0 8.0 7.8
[49] 6.3 7.3 6.6 7.0 6.3 6.2 7.2 7.5 8.4 6.2 5.8 6.8 5.4 6.6 6.9 7.3 9.0 8.3 6.5 7.9 7.5 4.8 5.2 6.9
[73] 5.4 7.9 6.1 5.8 8.3 7.8 7.0 6.1 7.0 7.6 6.3 7.8 6.4 6.5 7.9 7.8 6.6 5.5 8.2 6.4 8.1 8.6 8.8 7.9
[97] 6.7 7.8 7.8 6.6 6.1 5.6 6.4 6.1 7.3 6.6 6.3 6.1 7.1 5.5 7.5 7.6 6.4 7.2 6.7 8.0 8.3 6.7 5.9 6.7
[121] 6.7 7.6 7.2 7.1 8.1 6.7 7.0 6.9 5.1 5.8 6.2 7.4 5.8 6.2 7.3 4.2 6.9 6.4 5.4 6.7 5.8 6.9 7.2 6.9
[145] 6.1 5.5 6.6 6.1 6.3 7.2 7.4 7.3 6.1 7.7 6.1 8.0 7.3 7.9 5.5 5.0 7.7 6.6 5.7 5.8 6.0 6.4 6.9 6.4
[169] 7.4 5.5 5.9 6.8 6.8 8.1 6.5 7.2 6.7 8.1 7.6 7.4 7.6 6.7 6.5 6.6 6.7 6.4 5.8 7.4 7.8 6.6 4.9 6.5
[193] 6.2 7.3 7.5 5.6 8.1 6.7 6.6 6.4 7.5 7.3 7.5 5.8 7.5 6.6 6.7 3.7 6.0 6.4 6.1 6.4 5.6 8.0 5.2 7.1
[217] 4.8 7.0 5.4 6.6 6.7 6.2 6.1 5.3 6.3 7.0 7.6 6.7 8.1 6.7 6.5 7.3 6.0 6.1 5.9 7.8 5.8 6.3 4.3 6.4
[241] 6.1 6.5 7.1 6.4 6.5 6.3 7.5 4.9 5.8 6.2 5.5 5.4 5.8 7.1 5.4 3.7 6.7 7.2 8.8 5.8 6.8 3.8 7.1 7.2
[265] 5.9 7.1 8.1 6.9 4.4 6.5 8.5 7.7 7.4 8.0 5.7 8.5 7.0 7.8 7.2 6.4 5.5 6.7 6.1 8.5 6.9 7.3 6.7 6.9
[289] 5.1 6.8 6.7 6.0 5.7 8.0 8.2 5.4 7.2 7.5 7.0 6.0 7.1 5.4 6.1 5.3 7.0 3.8 6.9 7.2 7.3 6.3 7.5 7.6
[313] 6.8 5.2 7.7 6.2 7.7 4.3 6.9 6.6 7.0 6.7 8.2 8.9 8.7 5.5 5.7 6.3 5.9 7.6 6.6 5.3 6.0 8.0 5.6 5.9
[337] 7.3 7.9 6.8 6.6 6.6 7.0 7.0 7.3 5.5 8.5 7.5 7.0 7.8 7.6 7.6 6.8 5.0 7.1 5.5 5.6 7.1 4.9 7.4 5.7
[361] 6.4 5.9 5.5 6.9 6.2 7.0 5.6 7.0 6.8 5.4 6.1 6.7 6.9 8.0 4.4 7.3 6.3 7.7 6.5 7.8 6.4 7.8 5.8 7.1
[385] 7.1 6.8 4.8 6.2 6.9 7.3 6.6 6.9 6.2 6.7 7.6 6.7 6.2 7.3 6.0 7.1 7.1 5.5 5.6 7.5 5.4 4.3 4.9 7.1
[409] 6.4 4.3 6.1 7.0 7.7 5.9 6.7 6.5 7.1 7.3 6.5 7.0 6.8 7.2 6.1 6.7 6.4 4.4 5.4 6.5 6.7 8.1 5.6 6.3
[433] 7.3 6.1 7.7 6.4 6.8 6.6 7.2 6.9 5.2 4.9 6.3 5.6 5.5 6.7 7.6 5.7 4.6 7.0 5.2 5.1 6.6 6.7 7.3 5.9
[457] 5.6 6.5 5.9 7.0 5.3 5.9 6.3 6.3 7.3 5.8 5.2 5.7 5.8 5.6 6.0 5.8 6.0 5.7 6.0 7.8 4.2 5.6 8.2 8.5
[481] 5.8 6.5 7.2 6.7 5.9 7.8 5.9 4.1 6.8 5.8 7.5 6.9 6.5 6.9 7.9 7.4 6.7 7.4 6.9 6.8 6.7 5.1 4.1 7.3
[505] 6.0 7.3 5.4 5.9 7.1 6.0 6.5 5.7 7.6 6.6 5.4 7.3 6.5 6.6 6.6 5.9 6.7 6.1 6.6 6.6 5.3 6.0 4.7 6.1
[529] 7.2 6.4 6.1 5.9 6.0 6.3 5.6 6.4 7.1 6.6 4.6 8.4 7.1 7.4 6.9 4.5 7.1 6.5 5.3 6.7 7.2 7.2 5.5 5.8
[553] 6.0 6.6 8.3 6.7 7.1 6.0 6.9 5.6 5.6 4.5 7.1 6.5 6.4 5.8 8.0 6.2 7.2 6.1 7.6 6.3 6.3 6.3 7.7 7.0
[577] 5.3 5.6 5.2 5.4 6.4 5.9 6.3 6.5 3.6 5.8 6.2 5.4 6.1 4.2 6.7 4.2 6.4 4.9 6.8 7.7 5.6 6.4 7.2 6.0
[601] 5.9 7.9 7.1 5.9 6.2 7.0 5.4 8.6 6.5 6.4 7.6 5.5 7.4 8.7 7.6 5.5 7.6 6.5 6.9 6.7 6.6 7.2 6.4 6.4
[625] 6.0 6.1 6.0 6.4 6.4 7.3 5.2 6.6 6.3 5.9 6.7 5.4 6.4 6.7 6.2 6.1 8.8 7.1 5.7 5.0 5.1 6.9 4.8 6.5
[649] 5.1 7.1 7.5 6.2 6.3 8.1 6.6 6.9 6.1 4.3 6.6 6.8 3.8 5.9 7.9 6.3 5.5 7.7 6.3 7.1 8.5 5.8 8.1 7.9
[673] 7.2 6.3 8.1 7.0 5.5 6.7 5.2 7.0 6.1 6.6 5.5 5.9 5.4 6.4 5.7 6.7 7.1 6.8 6.5 7.6 5.5 6.5 7.0 5.8
[697] 7.3 6.6 4.4 7.7 5.0 7.7 4.4 6.1 5.4 6.8 6.5 7.0 6.3 6.3 6.1 6.1 5.3 5.4 6.2 6.6 5.9 6.3 7.2 6.8
[721] 6.1 7.8 5.0 6.2 6.7 4.9 7.4 6.2 4.9 6.1 6.1 6.4 6.3 6.6 5.7 5.9 6.0 6.1 6.7 6.7 7.9 4.3 5.7 6.7
[745] 6.7 6.1 5.6 6.6 6.9 4.8 6.2 6.0 4.9 5.6 6.1 6.1 4.8 5.5 3.8 6.5 6.7 8.1 4.9 7.3 6.4 6.7 3.6 5.7
[769] 6.0 4.7 6.3 5.9 5.9 7.5 5.6 6.4 6.3 4.3 5.9 5.5 6.2 8.8 5.2 7.0 6.6 7.3 5.6 6.6 5.4 6.3 7.9 6.3
[793] 6.0 7.2 5.1 7.3 8.0 6.2 6.0 6.7 8.1 6.4 8.0 6.3 6.4 6.6 6.4 6.0 6.6 5.9 6.4 6.3 7.3 6.8 7.2 5.7
[817] 6.0 6.5 5.8 5.8 6.7 7.8 5.6 5.8 7.4 6.9 5.5 6.3 4.7 5.6 6.4 4.2 6.4 7.7 6.7 7.7 5.7 7.6 6.4 5.6
[841] 6.8 6.2 5.9 7.1 7.6 5.5 7.0 7.1 7.4 7.6 5.9 5.9 8.0 7.4 5.8 6.3 5.7 5.1 7.6 6.4 7.4 8.2 6.5 5.5
[865] 6.5 5.6 4.6 7.9 7.1 6.9 7.3 7.0 7.7 6.7 6.3 5.8 7.1 7.3 6.4 7.1 7.6 6.8 6.6 6.7 6.1 6.0 7.6 7.1
[889] 5.0 6.2 5.6 7.4 5.0 5.2 7.6 6.6 7.0 5.7 8.2 6.2 6.6 4.7 6.3 6.1 6.7 6.1 7.0 7.4 7.3 5.8 6.7 5.8
[913] 7.8 6.6 6.5 6.7 7.3 5.8 5.5 6.3 7.4 5.9 6.2 5.9 6.5 4.4 3.5 6.6 6.0 6.4 6.5 4.3 4.2 6.5 6.1 6.3
[937] 6.2 5.9 5.9 6.5 6.4 6.5 5.7 8.0 7.3 6.7 7.5 5.4 6.6 7.7 5.8 6.4 5.6 6.0 6.2 5.9 5.1 6.8 6.0 5.1
[961] 5.8 6.2 6.4 4.8 4.9 5.6 5.5 3.7 5.9 6.3 7.6 8.3 6.9 6.7 6.8 7.1 6.4 6.4 7.4 6.4 6.0 6.5 7.8 6.0
[985] 7.0 6.0 6.1 6.8 6.4 4.5 5.8 6.3 5.7 7.2 7.6 4.7 6.6 6.8 7.3 4.8
[ reached getOption("max.print") -- omitted 2738 entries ]
library(ggrepel)
IMDB$title_year
IMDB %>%
filter(title_year %in% c(1980:2000)) %>%
arrange(desc(profit)) %>%
top_n(10, profit) %>%
ggplot(aes(x=budget/1000000, y=profit/1000000)) +
geom_point() +
geom_smooth() +
geom_text_repel(aes(label=movie_title)) +
labs(x = "Budget $million", y = "Profit $million", title = "Top 10 Profitable Movies") +
theme(plot.title = element_text(hjust = 0.5))
IMDB %>%
filter(title_year %in% c(2000:2016)) %>%
arrange(desc(profit)) %>%
top_n(10, profit) %>%
ggplot(aes(x=budget/1000000, y=profit/1000000)) +
geom_point() +
geom_smooth() +
geom_text_repel(aes(label=movie_title)) +
labs(x = "Budget $million", y = "Profit $million", title = "Top 10 Profitable Movies") +
theme(plot.title = element_text(hjust = 0.5))
IMDB %>%
filter(title_year %in% c(1980:2000)) %>%
arrange(desc(profit)) %>%
top_n(3, profit) %>%
ggplot(aes(x=movie_title, y=profit/1000000)) +
geom_bar(colour="blue",stat="identity",position=position_dodge()) +
labs(x = "Movie_title", y = "Profit in Millions", title = "Histogram of Top 3 movies by budget in 1980-2000") +
theme(plot.title = element_text(hjust = 1.0))
IMDB %>%
filter(title_year %in% c(2000:2016)) %>%
arrange(desc(profit)) %>%
top_n(3, profit) %>%
ggplot(aes(x=movie_title, y=profit/1000000)) +
geom_bar(colour="black",stat="identity",position=position_dodge()) +
labs(x = "Movie_title", y = "Profit in Millions", title = "Histogram of Top 3 movies by Profit in 2000-2016") +
theme(plot.title = element_text(hjust = 1.0))
summary(IMDB$genres)
Comedy|Drama|Romance Drama
148 148
Comedy|Drama Comedy
140 131
Comedy|Romance Drama|Romance
129 115
Crime|Drama|Thriller Action|Crime|Thriller
82 55
Action|Crime|Drama|Thriller Action|Adventure|Sci-Fi
50 45
Action|Adventure|Thriller Comedy|Crime
45 43
Horror Crime|Drama
43 42
Drama|Thriller Crime|Drama|Mystery|Thriller
42 41
Action|Adventure|Sci-Fi|Thriller Horror|Thriller
34 34
Horror|Mystery|Thriller Biography|Drama
31 30
Drama|Mystery|Thriller Action|Comedy|Crime
30 27
Adventure|Animation|Comedy|Family|Fantasy Action|Adventure|Fantasy
27 25
Horror|Mystery Biography|Drama|Sport
25 23
Action|Thriller Drama|Sport
22 22
Action|Comedy|Crime|Thriller Adventure|Animation|Comedy|Family
21 21
Biography|Drama|History Action|Crime|Drama|Mystery|Thriller
20 19
Drama|Music Action|Drama|Thriller
19 18
Adventure|Family|Fantasy Comedy|Crime|Drama
17 17
Comedy|Family Mystery|Thriller
17 17
Action|Sci-Fi Comedy|Family|Fantasy
16 16
Biography|Crime|Drama Biography|Drama|Romance
15 15
Crime|Thriller Documentary
15 15
Drama|History|War Drama|Music|Romance
15 15
Drama|War Fantasy|Horror
15 15
Action|Horror|Sci-Fi|Thriller Comedy|Crime|Thriller
14 14
Comedy|Fantasy Comedy|Sport
14 14
Crime|Drama|Romance|Thriller Crime|Mystery|Thriller
14 14
Action|Sci-Fi|Thriller Adventure|Comedy
13 13
Comedy|Drama|Romance|Sport Comedy|Fantasy|Romance
13 13
Action|Adventure|Drama|Thriller Action|Adventure|Fantasy|Sci-Fi
12 12
Action|Comedy Comedy|Music
12 12
Drama|Fantasy|Romance Drama|Mystery|Sci-Fi|Thriller
12 12
Drama|Sci-Fi|Thriller Horror|Sci-Fi|Thriller
12 12
Action|Adventure|Fantasy|Sci-Fi|Thriller Action|Crime|Sci-Fi|Thriller
11 11
Adventure|Comedy|Family|Fantasy Comedy|Drama|Family
11 11
Comedy|Drama|Fantasy|Romance Drama|Horror|Mystery|Thriller
11 11
Action|Adventure|Comedy Adventure|Animation|Family|Fantasy
10 10
Biography|Drama|Music Comedy|Crime|Romance
10 10
Comedy|Family|Romance Drama|Mystery|Romance|Thriller
10 10
Action|Mystery|Thriller Adventure|Comedy|Sci-Fi
9 9
Biography|Drama|History|Romance Comedy|Drama|Music
9 9
Drama|Horror|Thriller Action|Adventure
9 8
Action|Adventure|Comedy|Family|Sci-Fi Action|Adventure|Crime|Thriller
8 8
Action|Comedy|Sci-Fi Action|Drama|War
8 8
Adventure|Drama Adventure|Fantasy
8 8
Comedy|Fantasy|Horror Drama|Family
8 8
Drama|History Drama|History|Thriller
8 8
Drama|Horror|Sci-Fi|Thriller Drama|Mystery
8 8
Drama|Romance|War Fantasy|Horror|Thriller
8 8
Action|Comedy|Crime|Drama|Thriller (Other)
7 1241
install.packages("formattable")
Installing package into <U+393C><U+3E31>C:/Users/Mayur Mehta/Documents/R/win-library/3.5<U+393C><U+3E32>
(as <U+393C><U+3E31>lib<U+393C><U+3E32> is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/formattable_0.2.0.1.zip'
Content type 'application/zip' length 278295 bytes (271 KB)
downloaded 271 KB
package formattable successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\Mayur Mehta\AppData\Local\Temp\RtmpyIA7Z9\downloaded_packages
library(formattable)
IMDB %>%
group_by(genres) %>%
summarise(avg_imdb = mean(imdb_score)) %>%
arrange(desc(avg_imdb)) %>%
top_n(20, avg_imdb) %>%
formattable(list(avg_imdb = color_bar("green")), align = 'l')
summary(IMDB$country)
Afghanistan Argentina Aruba
0 1 3 1
Australia Bahamas Belgium Brazil
40 0 2 5
Bulgaria Cambodia Cameroon Canada
0 0 0 60
Chile China Colombia Czech Republic
1 14 1 3
Denmark Dominican Republic Egypt Finland
8 0 0 1
France Georgia Germany Greece
104 1 79 1
Hong Kong Hungary Iceland India
13 2 2 10
Indonesia Iran Ireland Israel
1 4 7 2
Italy Japan Kenya Kyrgyzstan
11 17 0 0
Libya Mexico Netherlands New Line
0 8 3 1
New Zealand Nigeria Norway Official site
11 0 4 1
Pakistan Panama Peru Philippines
0 0 1 0
Poland Romania Russia Slovakia
1 2 3 0
Slovenia South Africa South Korea Soviet Union
0 3 8 0
Spain Sweden Switzerland Taiwan
22 1 0 2
Thailand Turkey UK United Arab Emirates
4 0 315 0
USA West Germany
2953 1
IMDB %>%
filter(title_year %in% c(2012:2016)) %>%
arrange(desc(profit)) %>%
top_n(20, profit) %>%
ggplot(aes(x=budget/1000000, y=profit/1000000)) +
geom_point() +
geom_smooth() +
geom_text_repel(aes(label=country)) +
labs(x = "Budget $million", y = "Profit $million", title = "Top 10 Profitable Countries") +
theme(plot.title = element_text(hjust = 0.5))
install.packages("plotly")
library(plotly)
IMDB %>%
plot_ly(x = ~country, y = ~imdb_score, color = ~content_rating , mode = "markers", text = ~content_rating, alpha = 0.7, type = "scatter")
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
install.packages("GGally")
library(GGally)
IMDB %>%
group_by(genres) %>%
summarise(avg_gross = mean(gross)) %>%
arrange(desc(avg_gross)) %>%
top_n(20, avg_gross) %>%
formattable(list(avg_gross = color_bar("green")), align = 'l')
IMDB$quality_group <- cut(IMDB$imdb_score, breaks = c(0,4,6,8,10))
install.packages("randomForest")
library(randomForest)
set.seed(53)
rf <- randomForest(quality_group ~ . -imdb_score, data = train, mtry = 5)
# Show model error
plot(rf)
legend('topright', colnames(rf$err.rate), col=1:5, fill=1:5)
IMDB %>%
filter(title_year %in% c(1980:2000)) %>%
arrange(desc(profit)) %>%
top_n(3, profit) %>%
ggplot(aes(x=genres, y=profit/1000000)) +
geom_bar(colour="black",stat="identity",position=position_dodge()) +
labs(x = "genres", y = "Profit in Millions", title = "Histogram of Top 3 genres by Profit in 1980-2000") +
theme(plot.title = element_text(hjust = 1.0))
IMDB %>%
filter(title_year %in% c(2000:2016)) %>%
arrange(desc(profit)) %>%
top_n(3, profit) %>%
ggplot(aes(x=genres, y=profit/1000000)) +
geom_bar(colour="red",stat="identity",position=position_dodge()) +
labs(x = "genres", y = "Profit in Millions", title = "Histogram of Top 3 genres by Profit in 2000-2016") +
theme(plot.title = element_text(hjust = 1.0))
IMDB1$title_year
IMDB1 <- IMDB1 %>%
mutate(profit1 = gross - budget,
return_on_investment_perc1 = (profit1/budget)*100)
IMDB1 <- IMDB[IMDB$title_year >= 2000,]
profit_new <- head(arrange(IMDB1,desc(profit1)), n = 10)
profit_new
top_profit1 <- profit_new$profit1
top_profit1
IMDB2 <- IMDB[IMDB$title_year < 2000,]
IMDB2 <- IMDB2 %>%
mutate(profit2 = gross - budget,
return_on_investment_perc1 = (profit2/budget)*100)
profit_old <- head(arrange(IMDB2,desc(profit2)), n = 10)
profit_old
top_profit2 <- profit_old$profit2
top_profit2
[1] 458672302 449935665 424449459 377783777 359544677 293784000 276625409 274691196 272158751
[10] 267761243
budget1 <- head(arrange(IMDB,desc(budget)), n = 10)
budget1
top_budget <- budget1$budget
top_budget
[1] 12215500000 4200000000 2500000000 2400000000 2127519898 1100000000 1000000000 700000000
[9] 700000000 553632000
df=data.frame(top_budget,top_profit1,top_profit2)
g <- ggplot(df, aes(x=top_budget/1000000))
g <- g + geom_line(aes(y=top_profit1/1000000), colour="red")
g <- g + geom_line(aes(y=top_profit2/1000000), colour="green")
g
sample.reg.model.best <- lm(gross ~ num_critic_for_reviews+director_facebook_likes+actor_3_facebook_likes+actor_1_facebook_likes+num_voted_users+cast_total_facebook_likes+num_user_for_reviews+budget+actor_2_facebook_likes+imdb_score, data = movie) summary(sample.reg.model.best)
movie.data <- movie
library(caret) set.seed(105) index <- createDataPartition(movie.data$gross, p = 0.8, list = F) train.data <- movie.data[index,] test.data <- movie.data[-index,]
train.reg.model <- lm(gross ~ num_critic_for_reviews+director_facebook_likes+actor_3_facebook_likes+actor_1_facebook_likes+num_voted_users+cast_total_facebook_likes+num_user_for_reviews+budget+actor_2_facebook_likes+imdb_score, data = train.data) summary(train.reg.model)
pred.gross.1 <- predict(train.reg.model,test.data, interval = “confidence”) head(pred.gross.1, n=10)
library(caret) cross.valid <- trainControl(method = “cv”, number = 10) model.cross.valid <- train(gross ~ num_critic_for_reviews+director_facebook_likes+actor_3_facebook_likes+actor_1_facebook_likes+num_voted_users+cast_total_facebook_likes+num_user_for_reviews+budget+actor_2_facebook_likes+imdb_score, data = train.data, trControl = cross.valid, method = “lm”) model.cross.valid
model.pred.cv <- predict(model.cross.valid, newdata = test.data)
cat(“Test MSE value for the cross validated model is :”) mean((model.pred.cv - test.data\(gross)^2) cat("\nThe Test RMSE value for the cross validated model is :\n") sqrt(mean((model.pred.cv - test.data\)gross)^2))
cat(“Cross Validated Model has a lower RMSE for Test Data set. This indicates that the model is a good one!”)
```
##############################RANDOM FORESTS###########################
IMDB$Movie_Quality <- cut(IMDB$imdb_score, breaks = c(0,4,6,8,10))
IMDB <- IMDB[,c(9,4,5,14,12,2,3,13,1,6,10,7,8,11,15)]
colnames(IMDB) <- c("budget", "gross", "user_vote", "critic_review_ratio",
"movie_fb", "director_fb", "actor1_fb", "other_actors_fb",
"duration", "face_number", "year", "country", "content",
"imdb_score", "Movie_Quality")
train.index.new <- sample(row.names(IMDB), dim(IMDB)[1]*0.8)
test.index.new <- setdiff(row.names(IMDB), train.index.new)
train.new <- IMDB[train.index.new, ]
test.new <- IMDB[test.index.new, ]
library(randomForest)
set.seed(53)
rf.new <- randomForest(Movie_Quality ~ . -imdb_score, data = train.new,
mtry = 5)
#Model Error Plot
plot(rf.new)
legend('topright', colnames(rf.new$err.rate), col=1:5, fill=1:5)
summary(rf)
# Get importance
importance <- importance(rf)
importance.new <- importance(rf.new)
varImportance <- data.frame(Variables = row.names(importance),
Importance = round(importance[ ,'MeanDecreaseGini'],2))
varImportance.new <- data.frame(Variables = row.names(importance.new),
Importance = round(importance.new[ ,'MeanDecreaseGini'],2))
plot(varImportance.new)
# Test prediction accuracy
set.seed(633)
# apply model on test set
rf.pred.test.new <- predict(rf.new, test.new)
# generate confusion matrix for test data
confusionMatrix(rf.pred.test.new, test.new$binned_score)